import capstone
import binwalk.core.common
import binwalk.core.compat
from binwalk.core.module import Module, Option, Kwarg

class ArchResult(object):
    def __init__(self, **kwargs):
        for (k,v) in binwalk.core.compat.iterator(kwargs):
            setattr(self, k, v)

class Architecture(object):
    def __init__(self, **kwargs):
        for (k, v) in binwalk.core.compat.iterator(kwargs):
            setattr(self, k, v)

class Disasm(Module):

    THRESHOLD = 10
    DEFAULT_MIN_INSN_COUNT = 500

    TITLE = "Disassembly Scan"
    ORDER = 10

    CLI = [
            Option(short='Y',
                   long='disasm',
                   kwargs={'enabled' : True},
                   description='Identify the CPU architecture of a file using the capstone disassembler'),
            Option(short='T',
                   long='minsn',
                   type=int,
                   kwargs={'min_insn_count' : 0},
                   description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT),
            Option(long='continue',
                   short='k',
                   kwargs={'keep_going' : True},
                   description="Don't stop at the first match"),
          ]

    KWARGS = [
                Kwarg(name='enabled', default=False),
                Kwarg(name='keep_going', default=False),
                Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT),
             ]

    ARCHITECTURES = [
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 32-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 32-bit, little endian"),
                    Architecture(type=capstone.CS_ARCH_ARM64,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 64-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_ARM64,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 64-bit, little endian"),

                    Architecture(type=capstone.CS_ARCH_PPC,
                                 mode=capstone.CS_MODE_BIG_ENDIAN,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="PPC executable code, 32/64-bit, big endian"),

                    Architecture(type=capstone.CS_ARCH_MIPS,
                                 mode=capstone.CS_MODE_64,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="MIPS executable code, 32/64-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_MIPS,
                                 mode=capstone.CS_MODE_64,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="MIPS executable code, 32/64-bit, little endian"),

                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_THUMB,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 16-bit (Thumb), little endian"),
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_THUMB,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 16-bit (Thumb), big endian"),
                    ]

    def init(self):
        self.disassemblers = []

        if not self.min_insn_count:
            self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT

        self.disasm_data_size = self.min_insn_count * 10

        for arch in self.ARCHITECTURES:
            self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description))

    def scan_file(self, fp):
        total_read = 0

        while True:
            result = None

            (data, dlen) = fp.read_block()
            if not data:
                break

            # If this data block doesn't contain at least two different bytes, skip it
            # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
            if len(set(data)) >= 2:
                block_offset = 0

                # Loop through the entire block, or until we're pretty sure we've found some valid code in this block
                while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD):
                    # Don't pass the entire data block into disasm_lite, it's horribly inefficient
                    # to pass large strings around in Python. Break it up into smaller code blocks instead.
                    code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset+self.disasm_data_size])

                    # If this code block doesn't contain at least two different bytes, skip it
                    # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
                    if len(set(code_block)) >= 2:
                        for (md, description) in self.disassemblers:
                            insns = [insn for insn in md.disasm_lite(code_block, (total_read+block_offset))]
                            binwalk.core.common.debug("0x%.8X   %s, at least %d valid instructions" % ((total_read+block_offset),
                                                                                                        description,
                                                                                                        len(insns)))

                            # Did we disassemble at least self.min_insn_count instructions?
                            if len(insns) >= self.min_insn_count:
                                # If we've already found the same type of code in this block, simply update the result counter
                                if result and result.description == description:
                                    result.count += 1
                                    if result.count >= self.THRESHOLD:
                                        break
                                else:
                                    result = ArchResult(offset=total_read+block_offset+fp.offset,
                                                        description=description,
                                                        insns=insns,
                                                        count=1)

                    block_offset += 1
                    self.status.completed += 1

                if result is not None:
                    r = self.result(offset=result.offset,
                                    file=fp,
                                    description=(result.description + ", at least %d valid instructions" % len(result.insns)))

                    if r.valid and r.display:
                        if self.config.verbose:
                            for (position, size, mnem, opnds) in result.insns:
                                self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds))
                        if not self.keep_going:
                            return

            total_read += dlen
            self.status.completed = total_read

    def run(self):
        for fp in iter(self.next_file, None):
            self.header()
            self.scan_file(fp)
            self.footer()

